In [1]:
#-----------------------------------------------------------------------
# Juan David Correa www.astropema.com March 2025
#----------------------------------------------------------------------
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
# Load the datasets
book1 = pd.read_csv("raw_data_books/book1.csv")
book2 = pd.read_csv("raw_data_books/book2.csv")
book3 = pd.read_csv("raw_data_books/book3.csv")
book4 = pd.read_csv("raw_data_books/book4.csv")
book5 = pd.read_csv("raw_data_books/book5.csv")
# Rename columns to match the expected names
book1 = book1.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book2 = book2.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book3 = book3.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book4 = book4.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
book5 = book5.rename(columns={'Person 1': 'Source', 'Person 2': 'Target', 'weight': 'Weight'})
# Combine the datasets
data = pd.concat([book1, book2, book3, book4, book5])
# Display the first few rows of the combined dataset
print(data.head())
# Create a graph object
G = nx.Graph()
# Add edges to the graph
for index, row in data.iterrows():
G.add_edge(row['Source'], row['Target'], weight=row['Weight'])
# Use a layout algorithm for better positioning
pos = nx.spring_layout(G, k=0.15, iterations=20)
# Visualize the network and save as an image file
plt.figure(figsize=(50, 50))
nx.draw(G, pos, with_labels=True, node_size=300, font_size=10, font_weight='bold', edge_color='gray')
plt.savefig("game_of_thrones_network.png", format="PNG", dpi=300)
plt.show()
# Print basic information about the graph
print(f"Graph has {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")
# Print the first few nodes and edges
print("Nodes:", list(G.nodes)[:10])
print("Edges:", list(G.edges(data=True))[:10])
# Degree centrality
degree_centrality = nx.degree_centrality(G)
print("Degree Centrality:", sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10])
# Betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G)
print("Betweenness Centrality:", sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10])
# Closeness centrality
closeness_centrality = nx.closeness_centrality(G)
print("Closeness Centrality:", sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:10])
Source Target Type Weight \ 0 Addam-Marbrand Jaime-Lannister Undirected 3 1 Addam-Marbrand Tywin-Lannister Undirected 6 2 Aegon-I-Targaryen Daenerys-Targaryen Undirected 5 3 Aegon-I-Targaryen Eddard-Stark Undirected 4 4 Aemon-Targaryen-(Maester-Aemon) Alliser-Thorne Undirected 4 book 0 1.0 1 1.0 2 1.0 3 1.0 4 1.0